package org.docshare.extract;


import cn.edu.hfut.dmic.htmlbot.contentextractor.ContentExtractor;

public class Main {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		String s = Page.getPage("http://www.sqsxs.com/book/12/12731/5236385.html","gbk",3);
		//System.out.println(s);
		//String content;
		System.out.println("readed");
		long start = System.currentTimeMillis();
		try {
			s = s.replace("&nbsp;", "");
			String content = ContentExtractor.getContentByHtml(s);
			long end = System.currentTimeMillis();
			System.out.println(end - start);
	        System.out.println(content);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

}
